package com.lvmama.rhino.etl

import com.lvmama.rhino.common.entity.ApiLog
import com.lvmama.rhino.common.utils.Constants
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by yuanxiaofeng on 2016/5/5.
  */
object ApiLogETL {
  private val AppName = "ApiLogETLJob"

  def execute(master: Option[String], args: List[String], jars: Seq[String] = Nil ): Unit ={
    val sc = {
      val conf = new SparkConf().setAppName(AppName).setJars(jars)
      for (m <- master){
        conf.setMaster(m)
      }
      new SparkContext(conf)
    }
    val sqlCtx = new SQLContext(sc)
    import sqlCtx.implicits._
    val file = sc.textFile(args.head)
    val apiLog = file.map(_.split(Constants.LOG_SEPERATOR_1)).map(log =>
      ApiLog(log(0),log(1),log(2),log(3),log(4),log(5),log(6),log(7),log(8),log(9),log(10),log(11),log(12),log(13),log(14),log(15),log(16),log(17),log(18),log(19))).toDS

    apiLog.show(20)
  }

}
